/************************************************/
/************************************************/
/**** PART 1: DATA CLEAN  ****/
/************************************************/
/************************************************/




********************************************************************************
**************merge patient visit data with diagnostic testing
********************************************************************************

****aggregate task-level data to patient visit level

	use task,clear //task.dta: task level data

*lab testing
	gen Lab=(ordertaskcategory=="Lab")
	bysort visitnumber: egen LabCount=total(Lab)

*imaging
	gen US=(ordertasktype=="Ultrasonography")
	replace US=1 if strpos(ordertask, "U/S")&ordertaskcategory=="Radiology"
	replace US=1 if substr(ordertask,1,2)=="US"&ordertaskcategory=="Radiology"

	gen CT=(substr(ordertask,1,2)=="CT")&ordertaskcategory=="Radiology"
	gen MR=(substr(ordertask,1,2)=="MR")&ordertaskcategory=="Radiology"
	gen Xray=(CT==0&MR==0&US==0&ordertaskcategory=="Radiology")

	bysort visitnumber: egen CTCount=total(CT)
	bysort visitnumber: egen MRCount=total(MR)
	bysort visitnumber: egen USCount=total(US)
	bysort visitnumber: egen XrayCount=total(Xray)

*transfer to visit level
	duplicates drop visitnumber,force
	keep visitnumber LabCount CTCount MRCount USCount XrayCount

	
***merge with patient-visit data

	merge 1:1 visitnumber using EDvisit   //EDvisit.dta: visit-level data in 2 years
	
	drop _merge

	foreach x in Lab CT MR US Xray{

		replace `x'Count=0 if `x'Count==.
	 
	}



	
********************************************************************************
**************format date time
********************************************************************************

***format date

	foreach x in visitdate consultstartdate consultenddate {

	    ren `x' `x'_s
		
		gen `x'=date(`x'_s,"YMD") if substr(`x'_s,1,3)=="201"
		replace `x'=date(`x'_s,"DMY") if substr(`x'_s,1,3)!="201"
		format `x' %td

	}

	
	
***format time

	foreach x in visittime consultstarttime consultendtime {

		replace `x'="23:59:59" if `x'=="24:00:00"
		replace `x'=substr(`x',1,8) if substr(`x',-2,2)=="AM"&substr(`x',1,2)!="12"   //AM
		replace `x'="00"+substr(`x',3,6) if substr(`x',-2,2)=="AM"&substr(`x',1,2)=="12"  

		forvalues y = 1(1)11{
		
			gen yn=`y'+12
			tostring yn,replace
			replace `x'=yn+substr(`x',2,6) if substr(`x',-2,2)=="PM"&substr(`x',1,2)=="`y':" //PM
			replace `x'=yn+substr(`x',3,6) if substr(`x',-2,2)=="PM"&substr(`x',1,2)=="`y'"
			drop yn
		
		}
		
		replace `x'=substr(`x',1,8) if substr(`x',-2,2)=="PM"&substr(`x',1,2)=="12"

	}
	

***date_time

	foreach x in visit consulttart consultend {

		replace `x'date_s = string(`x'date, "%td")
		gen `x'_s=`x'date_s+" "+`x'time
		gen double `x'datetime= clock(`x'_s, "YMDhms") if substr(`x'_s,1,3)=="201"
		replace `x'datetime= clock(`x'_s, "DMYhms") if substr(`x'_s,1,3)!="201"

		drop `x'_s `x'date_s `x'time
		format `x'datetime %tc
	
	}


	
	
********************************************************************************
**************construct physician shift
* - time gap between two cases <=6h
********************************************************************************

	sort phyid consultenddatetime
	
	bysort phyid: gen double timegap=(consultenddatetime-consultenddatetime[_n-1])/60000  
	bysort phyid: gen double timegap1=(consultstartdatetime-consultenddatetime[_n-1])/60000
	sum timegap*,d

	bysort phyid: gen gap6=1 if timegap/60>6&timegap1/60>4
	bysort phyid: gen shiftnumber=sum(gap6)

	sort phyid shiftnumber consultstartdatetime
	bysort phyid shiftnumber: gen shiftstart=consultstartdatetime if _n==1
	bysort phyid shiftnumber: replace shiftstart=sum(shiftstart)
	sort phyid shiftnumber consultenddatetime
	bysort phyid shiftnumber: gen shiftend=consultenddatetime[_N]
	format shiftstart shiftend %tc

